/*============================================================================*/

* Do: Regressions Part 2 - LAPOP
* Last update: 09.05.2023

/*============================================================================*/

* ==============================================================================
* SECTION: FOLDERS
* ==============================================================================

*------------------------ SUBFOLDERS (DO NOT CHANGE)
gl in "$root/Rawdata"
gl out "$root/Final"
gl analysis "$root/Analysis"

* ==============================================================================
* SECTION: REGRESSIONS
* ==============================================================================
cap log close
log using "datacreate_part2_lapop", replace

use "$in/district_long", clear
drop if year==2021
bys ubigeo (year month): gen totnumvz_ubigeo = sum(numvz)
bys ubigeo (year month): gen totptp_ubigeo = sum(requestptp)

keep if month==3

la var totnumvz_ubigeo "Registered VZs"
la var totptp_ubigeo "VZs w/ PTP"

keep ubigeo year totnumvz totptp
tempfile admin
save `admin', replace

use "$in/province_long", clear
drop if year==2021

bys province_id (year month): gen totnumvz_prov = sum(numvz)
bys province_id (year month): gen totptp_prov = sum(requestptp)

la var totnumvz_prov "Registered VZs"
la var totptp_prov "VZs w/ PTP"

keep if month==3

keep province_id year totnumvz_prov totptp_prov
tempfile adminp
save `adminp', replace

* placebo data

replace year = year - 6
drop totptp_prov

ren totnumvz_prov totnumvz_prov_placebom6
la var totnumvz_prov_placebom6 "Registered VZs Six Years Later"

tempfile adminpp
save `adminpp', replace

use "$in/lapop", clear

* Keep working age individuals
keep if inrange(age, 14, 65)

merge n:1 ubigeo year using `admin', keep(match master)

replace totnumvz=0 if _merge==1
replace totptp=0 if _merge==1

drop _merge

gen province_id = substr(ubigeo,1,4)

merge n:1 province_id year using `adminp', keep(match master)
replace totnumvz_prov=0 if _merge==1
replace totptp_prov=0 if _merge==1

drop _merge

merge n:1 province_id year using `adminpp', keep(match master)
replace totnumvz_prov_placebom6=0 if _merge==1  & year<2015

drop _merge

* create province level info 

merge n:1 province_id using "$in/population_prov17", keep(match master)
drop _merge

merge n:1 province_id using "$in/vzborn_prov07", keep(match master)
replace vzborn_prov07=0 if _merge==1
drop _merge

gen logsharevz = ln((totnumvz_prov+1)/(population_prov17+1))
gen logsharevz07 = ln((vzborn_prov07+1)/(population_prov17+1))

gen logsharevz_placebom6 = ln((totnumvz_prov_placebom6+1)/(population_prov17+1))

destring region_id ubigeo province_id, replace

foreach var in share_saime share_ptp share_lapatilla share_combined {

gen `var' = 0

replace `var' = `var'2017 if year==2017
replace `var' = `var'2019 if year==2019

gen `var'_placebo11 = 0
replace `var'_placebo11 = `var'2017 if year==2012
replace `var'_placebo11 = `var'2019 if year==2014

drop `var'????
}

g informalemployrate07 = (informalemploy_ubigeo07 / workgage_ubigeo07)
g formalemployrate07 = (formalemploy_ubigeo07 / workgage_ubigeo07)
 
* Outcomes

qui foreach var of varlist trust* itrust1 opinion* share_combined share_combined_placebo11 informalemployrate07 formalemployrate07 {
qui su `var'
gen `var'_std = (`var' - r(mean))/r(sd)
}

ren trust7_std trust_media 
egen trust_police = rmean(trust1_std trust3_std)
ren trust6_std trust_localgovt 

ren itrust1_std trust_neighbors

ren opinion6_std quality_localservices
ren opinion7_std neighorhood_safety

gen problem_crime = inlist(countryprob,5,27) if countryprob~=.
gen problem_emp = inlist(countryprob,1,3) if countryprob~=.
gen problem_corrupt = inlist(countryprob,13) if countryprob~=.

gen region_year = region_id*10000 + year


* Education groups
g education_group = .
	replace education_group = 1 if education1 < 11 // less than secondary
	replace education_group = 2 if education1 == 11 // complete secondary
	replace education_group = 3 if education1 > 11 & education1 <= 13 // technical (complete+incomplete)
	replace education_group = 4 if education1 > 13 // university (undergradute+graduate)
	la define education_group 1 "Less than secondary" 2 "Complete secondary" 3 "Technical" 4 "University", replace
	la values education_group education_group
	la var education_group "Education"

gen employment_status = inlist(employmentstatus,1,2)

* Marital status

recode marital 1=3 2/3=1 4/6=2, gen(marital_status)

	la define ms 1 "Married/Cohabitation" 2"Divorced/Widowed/Separated" 3"Single"
	la values marital_status ms

* Trends (linear)

gen time = year-2006 

ren sex female

replace duration_Border = duration_Border/24/60/60


*------------------------ LABELS
* Covariates
la var employment_status "Employed"


save "$out/data_regressions_lapop", replace

log close
